{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Coordinate Frame Conventions\n",
    "A review of coordinate frame conventions in Habitat."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Imports { display-mode: \"form\" }\n",
    "\n",
    "# [imports]\n",
    "import os\n",
    "\n",
    "import git\n",
    "import magnum as mn\n",
    "from PIL import Image\n",
    "\n",
    "import habitat_sim\n",
    "\n",
    "try:\n",
    "    # For using jupyter IO components\n",
    "    import IPython.display\n",
    "\n",
    "    IS_NOTEBOOK = True\n",
    "\n",
    "except ImportError:\n",
    "    IS_NOTEBOOK = False\n",
    "# [/imports]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "# @title Setup { display-mode: \"form\" }\n",
    "\n",
    "# [setup]\n",
    "# Choose quiet logging. See src/esp/core/Logging.h\n",
    "os.environ[\"HABITAT_SIM_LOG\"] = \"quiet\"\n",
    "\n",
    "# define path to data directory\n",
    "repo = git.Repo(\".\", search_parent_directories=True)\n",
    "dir_path = repo.working_tree_dir\n",
    "data_path = os.path.join(dir_path, \"data\")\n",
    "\n",
    "# images will be either displayed in the notebook or saved as image files\n",
    "if not IS_NOTEBOOK:\n",
    "    output_directory = \"examples/tutorials/coordinate_system_tutorial_output/\"\n",
    "    output_path = os.path.join(dir_path, output_directory)\n",
    "    os.makedirs(output_path, exist_ok=True)\n",
    "\n",
    "# define some constants and globals the first time we run:\n",
    "opacity = 1.0\n",
    "red = mn.Color4(1.0, 0.0, 0.0, opacity)\n",
    "green = mn.Color4(0.0, 1.0, 0.0, opacity)\n",
    "blue = mn.Color4(0.0, 0.0, 1.0, opacity)\n",
    "white = mn.Color4(1.0, 1.0, 1.0, opacity)\n",
    "\n",
    "origin = mn.Vector3(0.0, 0.0, 0.0)\n",
    "eye_pos0 = mn.Vector3(2.5, 1.3, 1)\n",
    "eye_pos1 = mn.Vector3(3.5, 3.0, 4.5)\n",
    "obj_axes_len = 0.4\n",
    "\n",
    "if \"sim\" not in globals():\n",
    "    global sim\n",
    "    sim = None\n",
    "    global sensor_node\n",
    "    sensor_node = None\n",
    "    global lr\n",
    "    lr = None\n",
    "    global image_counter\n",
    "    image_counter = 0\n",
    "# [/setup]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Utilities { display-mode: \"form\" }\n",
    "\n",
    "\n",
    "# [utils]\n",
    "def create_sim_helper(scene_id):\n",
    "    global sim\n",
    "    global sensor_node\n",
    "    global lr\n",
    "\n",
    "    # clean-up the current simulator instance if it exists\n",
    "    if sim != None:\n",
    "        sim.close()\n",
    "\n",
    "    sim_cfg = habitat_sim.SimulatorConfiguration()\n",
    "    sim_cfg.scene_dataset_config_file = os.path.join(\n",
    "        data_path, \"replica_cad/replicaCAD.scene_dataset_config.json\"\n",
    "    )\n",
    "    sim_cfg.scene_id = scene_id\n",
    "    sim_cfg.enable_physics = True  # for ReplicaCAD articulated furniture\n",
    "\n",
    "    agent_cfg = habitat_sim.agent.AgentConfiguration()\n",
    "    rgb_sensor_spec = habitat_sim.CameraSensorSpec()\n",
    "    rgb_sensor_spec.uuid = \"color_sensor\"\n",
    "    rgb_sensor_spec.sensor_type = habitat_sim.SensorType.COLOR\n",
    "    rgb_sensor_spec.resolution = [768, 1024]\n",
    "    rgb_sensor_spec.position = [0.0, 0.0, 0.0]\n",
    "    agent_cfg.sensor_specifications = [rgb_sensor_spec]\n",
    "\n",
    "    cfg = habitat_sim.Configuration(sim_cfg, [agent_cfg])\n",
    "    sim = habitat_sim.Simulator(cfg)\n",
    "\n",
    "    # This tutorial doesn't involve agent concepts. We want to directly set\n",
    "    # camera transforms in world-space (the world's coordinate frame). We set\n",
    "    # the agent transform to identify and then return the sensor node.\n",
    "    sim.initialize_agent(0)\n",
    "    agent_node = sim.get_agent(0).body.object\n",
    "    agent_node.translation = [0.0, 0.0, 0.0]\n",
    "    agent_node.rotation = mn.Quaternion()\n",
    "    sensor_node = sim._sensors[\"color_sensor\"]._sensor_object.object\n",
    "\n",
    "    lr = sim.get_debug_line_render()\n",
    "    lr.set_line_width(3)\n",
    "\n",
    "\n",
    "def show_img(rgb_obs):\n",
    "    global image_counter\n",
    "\n",
    "    colors = []\n",
    "    for row in rgb_obs:\n",
    "        for rgba in row:\n",
    "            colors.extend([rgba[0], rgba[1], rgba[2]])\n",
    "\n",
    "    resolution_x = len(rgb_obs[0])\n",
    "    resolution_y = len(rgb_obs)\n",
    "\n",
    "    colors = bytes(colors)\n",
    "    img = Image.frombytes(\"RGB\", (resolution_x, resolution_y), colors)\n",
    "    if IS_NOTEBOOK:\n",
    "        IPython.display.display(img)\n",
    "    else:\n",
    "        filepath = f\"{output_directory}/{image_counter}.png\"\n",
    "        img.save(filepath)\n",
    "        print(f\"Saved image: {filepath}\")\n",
    "        image_counter += 1\n",
    "\n",
    "\n",
    "def show_scene(camera_transform):\n",
    "    sensor_node.transformation = camera_transform\n",
    "    observations = sim.get_sensor_observations()\n",
    "    show_img(observations[\"color_sensor\"])\n",
    "\n",
    "\n",
    "def draw_axes(translation, axis_len=1.0):\n",
    "    lr = sim.get_debug_line_render()\n",
    "    # draw axes with x+ = red, y+ = green, z+ = blue\n",
    "    lr.draw_transformed_line(translation, mn.Vector3(axis_len, 0, 0), red)\n",
    "    lr.draw_transformed_line(translation, mn.Vector3(0, axis_len, 0), green)\n",
    "    lr.draw_transformed_line(translation, mn.Vector3(0, 0, axis_len), blue)\n",
    "\n",
    "\n",
    "def calc_camera_transform(\n",
    "    eye_translation=mn.Vector3(1, 1, 1), lookat=mn.Vector3(0, 0, 0)\n",
    "):\n",
    "    # choose y-up to match Habitat's y-up convention\n",
    "    camera_up = mn.Vector3(0.0, 1.0, 0.0)\n",
    "    return mn.Matrix4.look_at(eye_translation, lookat, camera_up)\n",
    "\n",
    "\n",
    "# [/utils]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Coordinate frame conventions: y-up and right-handed\n",
    "Create a sim with an empty scene. Draw the world axes at the origin, with colors x+ = red, y+ = green, z+ = blue. Note conventions: y-up (green vector) and right-handed (https://en.wikipedia.org/wiki/Right-hand_rule)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [empty_scene_coord_frame]\n",
    "create_sim_helper(scene_id=\"NONE\")\n",
    "draw_axes(origin)\n",
    "show_scene(calc_camera_transform(eye_translation=eye_pos0, lookat=origin))\n",
    "# [/empty_scene_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Loading a ReplicaCAD scene\n",
    "Draw the world axes again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [replica_cad_scene_coord_frame]\n",
    "create_sim_helper(\n",
    "    scene_id=os.path.join(\n",
    "        data_path, \"replica_cad/configs/scenes/v3_sc0_staging_00.scene_instance.json\"\n",
    "    )\n",
    ")\n",
    "draw_axes(origin)\n",
    "show_scene(calc_camera_transform(eye_translation=eye_pos0, lookat=origin))\n",
    "# [/replica_cad_scene_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# A rigid object's local coordinate frame\n",
    "Add two chairs in different poses. The local origin is roughly at the center of mass. The local up axis is y+ (green vector) and the local forward axis is z- (negation of blue z+ vector). These conventions depends on how the object model is authored. ReplicaCAD object models follow these conventions, but models from other datasets may differ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [rigid_object_coord_frame]\n",
    "rigid_obj_mgr = sim.get_rigid_object_manager()\n",
    "obj_template = os.path.join(\n",
    "    data_path, \"replica_cad/configs/objects/frl_apartment_chair_01.object_config.json\"\n",
    ")\n",
    "\n",
    "# add two chair objects to the scene\n",
    "obj0 = rigid_obj_mgr.add_object_by_template_handle(obj_template)\n",
    "obj1 = rigid_obj_mgr.add_object_by_template_handle(obj_template)\n",
    "\n",
    "# pose the first chair at the origin with identity rotation\n",
    "obj0.translation = mn.Vector3(0.0, 0.0, 0.0)\n",
    "obj0.rotation = mn.Quaternion()\n",
    "\n",
    "# pose the second chair with an arbitrary translation and rotation\n",
    "obj1.translation = mn.Vector3(1.1, 0.4, 1.2)\n",
    "up_axis = mn.Vector3(0, 1, 0)\n",
    "obj1.rotation = mn.Quaternion.rotation(mn.Deg(-60.0), up_axis) * mn.Quaternion.rotation(\n",
    "    mn.Deg(20.0), mn.Vector3(0, 0, 1)\n",
    ")\n",
    "\n",
    "for obj in [obj0, obj1]:\n",
    "    # use DebugLineRender's push_transform to draw axes in each object's local coordinate frame.\n",
    "    lr.push_transform(obj.transformation)\n",
    "    draw_axes(origin, axis_len=obj_axes_len)\n",
    "    lr.pop_transform()\n",
    "\n",
    "# save the camera transform for use in the next block\n",
    "camera_transform = calc_camera_transform(eye_translation=eye_pos0, lookat=origin)\n",
    "show_scene(camera_transform)\n",
    "# [/rigid_object_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Camera coordinate frame\n",
    "Let's look more closely at the transform of the camera used for the previous image. The camera's local axes are similar to the chair: right = red = x+, up = green = y+, forward (into the scene) = z- (negation of blue z+ vector)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [camera_coord_frame]\n",
    "# draw the previous camera's local axes\n",
    "lr.push_transform(camera_transform)\n",
    "draw_axes(origin, axis_len=obj_axes_len)\n",
    "# draw some approximate edges of the previous camera's frustum\n",
    "fx = 2\n",
    "fy = 1.5\n",
    "fz = 4\n",
    "lr.draw_transformed_line(origin, mn.Vector3(-fx, -fy, -fz), white)\n",
    "lr.draw_transformed_line(origin, mn.Vector3(fx, -fy, -fz), white)\n",
    "lr.draw_transformed_line(origin, mn.Vector3(-fx, fy, -fz), white)\n",
    "lr.draw_transformed_line(origin, mn.Vector3(fx, fy, -fz), white)\n",
    "lr.pop_transform()\n",
    "\n",
    "# Show the scene from a position slightly offset from the previous camera.\n",
    "eye_offset = mn.Vector3(0.5, 0.75, 1.75)\n",
    "show_scene(calc_camera_transform(eye_translation=eye_pos0 + eye_offset, lookat=origin))\n",
    "# [/camera_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# More object coordinate frames\n",
    "Let's show the local origins and axes of all rigid objects in the scene."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [more_objects_coord_frame]\n",
    "obj_dict = rigid_obj_mgr.get_objects_by_handle_substring()\n",
    "for _, obj in obj_dict.items():\n",
    "    lr.push_transform(obj.transformation)\n",
    "    draw_axes(origin, axis_len=obj_axes_len)\n",
    "    lr.pop_transform()\n",
    "\n",
    "show_scene(calc_camera_transform(eye_translation=eye_pos1, lookat=origin))\n",
    "# [/more_objects_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Beware loading a GLB as a scene!\n",
    "Let's re-create the sim and load a chair GLB as a scene. Beware, this is an example of what *not* to do! This is a legacy codepath to support loading GLB scenes from the MP3D dataset (not shown here). One quirk of this legacy codepath is that it rotates the model 90 degrees!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "# [glb_scene_coord_frame]\n",
    "create_sim_helper(\n",
    "    scene_id=os.path.join(data_path, \"replica_cad/objects/frl_apartment_chair_01.glb\")\n",
    ")\n",
    "draw_axes(origin)\n",
    "show_scene(calc_camera_transform(eye_translation=eye_pos0, lookat=origin))\n",
    "# [/glb_scene_coord_frame]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Blender conventions and sources of confusion\n",
    "Blender is an open-source 3D-modeling tool that we on the Habitat team often use. We describe two caveats here:\n",
    "1. Its convention is z-up, e.g. the default 3D camera is oriented such that z is up.\n",
    "2. Blender automatically rotates gltf/glb models on import (essentially making the assumption that they were authored as y-up). It also reverses this rotation on export (see `+Y Up` gltf exporter option; enabled by default). The rotation is 90 degrees about the local x axis.\n",
    "\n",
    "Here, we've imported `frl_apartment_chair_01.glb` and taken a screenshot. Note the axes with the same colors used elsewhere in this tutorial: red = x+, green = y+, and blue = z+. Compare this image to the section above, `A rigid object's local coordinate frame`. The local axes are different, but the chair still appears upright.\n",
    "\n",
    "![Blender Chair](https://user-images.githubusercontent.com/6557808/134411206-eeff1529-04ab-4f20-bc7c-68102f2879f1.png)\n",
    "\n",
    "Let's consider an individual vertex stored in a mesh in a gltf file. Suppose the vertex's local `(x,y,z)` position is `(1,2,3)`. After importing into blender, the vertex's local position in Blender will be `(1,-3,2)`. If the mesh is re-exported as a gltf, the vertex will be written to the file as `(1,2,3)`."
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "jupytext": {
   "cell_metadata_filter": "-all",
   "formats": "nb_python//py:percent,notebooks//ipynb",
   "notebook_metadata_filter": "all"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
